# Set the working directory
setwd() #fill in

# Load required libraries
library(smacof)
library(vegan)

# Upload datasets
datasets <- c("gemini00_pooled", "gemini07_pooled") #example of the pooled datasets

alias_mapping <- c(
  "c_Self.direction.Thought" = "SDT",
  "c_Self.direction.Action" = "SDA",
  "c_Stimulation" = "ST",
  "c_Hedonism" = "HE",
  "c_Achievement" = "AC",
  "c_Power.Dominance" = "POD",
  "c_Power.Resources" = "POR",
  "c_Face" = "FAC",
  "c_Security.Personal" = "SEP",
  "c_Security.Societal" = "SES",
  "c_Tradition" = "TR",
  "c_Conformity.Rules" = "COR",
  "c_Conformity.Interpersonal" = "COI",
  "c_Humility" = "HUM",
  "c_Universalism.Nature" = "UNN",
  "c_Universalism.Concern" = "UNC",
  "c_Universalism.Tolerance" = "UNT",
  "c_Benevolence.Care" = "BEC",
  "c_Benevolence.Dependability" = "BED"
)

# Function for analysis of a single dataset
analyze_dataset <- function(dataset_name) {
  # Read the CSV file and extract data
  raw_data <- read.csv(paste0(dataset_name, ".csv"), header = TRUE)
  values <- raw_data[, 1:19]
  colnames(values) <- sapply(colnames(values), function(col) alias_mapping[col])
  
  print("Data loaded successfully:")
  
  # Creating correlation matrices
  r <- cor(values, use = "pairwise")
  
  # Save correlation matrix to CSV file
  output_file_csv <- paste0(dataset_name, "_correlation_matrix.csv")
  write.csv(r, file = output_file_csv)
  
  return(results)
}

# Load correlation matrices for all datasets (this is an example of the pooled datasets)
gemini00_pooled_correlation_matrix <- read.csv("gemini00_pooled_correlation_matrix.csv", row.names=1, header=TRUE)
gemini07_pooled_correlation_matrix <- read.csv("gemini07_pooled_correlation_matrix.csv", row.names=1, header=TRUE)
human_correlation_matrix <- read.csv("human_correlation_matrix.csv", row.names=1, header=TRUE)

# Calculate theoretical configuration (according to the literature) for the MDS
config19 <- matrix(0, nrow = 19, ncol = 2)
alpha <- -360 / 19
for (i in 1:19) {
  alpha = alpha + 360 / 19
  config19[i, 1] = cos(alpha * pi / 180)
  config19[i, 2] = sin(alpha * pi / 180)
}

# Perform MDS for all datasets using smacofSym
gemini00_pooled_dissMDS <- sim2diss(gemini00_pooled_correlation_matrix, method = "corr")
gemini00_pooled_aus <- smacofSym(gemini00_pooled_dissMDS, type = "ordinal", init = config19)

gemini07_pooled_dissMDS <- sim2diss(gemini07_pooled_correlation_matrix, method = "corr")
gemini07_pooled_aus <- smacofSym(gemini07_pooled_dissMDS, type = "ordinal", init = config19)

human_dissMDS <- sim2diss(human_correlation_matrix, method = "corr")
human_aus <- smacofSym(human_dissMDS, type = "ordinal", init = config19)


# Procrustes analysis to normalize the embeddings between LLM and human data
gemini00_pooled_procrustes <- protest(human_aus$conf, gemini00_aus$conf)
gemini07_pooled_procrustes <- protest(human_aus$conf, gemini07_aus$conf)